import pandas as pd
import geopandas as gpd
import shapely
import plotly.express as px
import numpy as np
import json
from datetime import datetime
from sodapy import Socrata
import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"
boroughs = gpd.read_file('data\Borough Boundaries.zip')
lines_gdf = gpd.read_file('data\Subway Lines.zip')
stations = pd.read_csv('data\MTA_Subway_Stations_20240214.csv')
stations_gdf = gpd.GeoDataFrame(data=stations,
geometry=stations['Georeference']
.apply(shapely.wkt.loads))
delays = pd.read_csv('data\MTA_Subway_Trains_Delayed__Beginning_2020_20240214.csv')
#total number of stations in the system
len(stations_gdf['Station ID'].unique())
493
# ADA: 0 if not accessable; 1 if fully accessable; 2 if partially accessable (one direction)
stations_gdf['ADA_labels'] = stations_gdf.ADA.map({0:"non-compliant",
1:"ADA-compliant",
2:"partially compliant"})
stations_gdf.rename(columns={'Stop Name':'Station'}, inplace=True)
# percent of stations that are ADA compliant in 2024
stations_gdf.ADA_labels.value_counts(normalize=True)
non-compliant 0.697581 ADA-compliant 0.286290 partially compliant 0.016129 Name: ADA_labels, dtype: float64
px.set_mapbox_access_token(open("mapbox_token").read())
stations_gdf['marker_size'] = 3
# plot subway stations
fig = px.scatter_mapbox(stations_gdf,
lon=stations_gdf['GTFS Longitude'],
lat=stations_gdf['GTFS Latitude'],
hover_data={'Station':True, 'GTFS Latitude':False,'GTFS Longitude':False, 'ADA_labels':False},
color="ADA_labels",
labels={"ADA_labels": "Subway Station ADA Compliance"},
size = 'marker_size',
size_max = 5,
width=1200, height=1000,
zoom=10)
# plot subway lines
lats = []
lons = []
names = []
for feature, name in zip(lines_gdf.geometry, lines_gdf.name):
if isinstance(feature, shapely.geometry.linestring.LineString):
linestrings = [feature]
elif isinstance(feature, shapely.geometry.multilinestring.MultiLineString):
linestrings = feature.geoms
else:
continue
for linestring in linestrings:
x, y = linestring.xy
lats = np.append(lats, y)
lons = np.append(lons, x)
names = np.append(names, [name]*len(y))
lats = np.append(lats, None)
lons = np.append(lons, None)
names = np.append(names, None)
names = list(names)
fig.add_trace(px.line_mapbox(lat=lats, lon=lons,
labels={'hover_data_0':'lines'},
hover_name=names,
# hover_data=[names],
# color=names,
# hover_data={'lat': False, 'lon': False, 'hover_data_0':names}
).data[0])
fig.update_traces(line={'width': 1})
# order of traces on map
fig.data = fig.data[::-1]
fig.show()
# calculate length of each subway line
subway_line_list = ['1', '2', '3', '4', '5', '6', '7', 'A', 'B', 'C', 'D', 'E', 'F',
'G', 'J-Z', 'L', 'M', 'N', 'Q', 'R']
subway_line_length = {}
# loop through each subway line
for line in subway_line_list:
subway_line_length[line] = 0 # initialize 0 length for each subway line
# loop through each row in lines_gdf
for row in lines_gdf.iterrows():
# add the shape length if the current line is in the lines_gdf name
if str(row[1][1]).find(line) != -1:
subway_line_length[line] += row[1][4]
subway_line_length
# testing above code works
# lines_gdf[lines_gdf.name.str.contains('A')].sum()[4]
# lines_gdf[lines_gdf.name.str.contains('1')].sum()[4]
{'1': 80688.49153911599,
'2': 140478.29424623805,
'3': 100918.98820500403,
'4': 135540.12091733,
'5': 161968.29187684198,
'6': 84055.2400603,
'7': 54718.379981503,
'A': 197918.53738361102,
'B': 154071.13705718605,
'C': 108888.57429322302,
'D': 160600.92606391906,
'E': 113068.626969864,
'F': 152976.558610879,
'G': 57745.32480826,
'J-Z': 73964.34249342198,
'L': 53486.00531743,
'M': 107209.47175261998,
'N': 116975.52983082198,
'Q': 102089.028988739,
'R': 134804.671964215}
# calculations for delays per line
# total number of delays for each line, by month
# note - there are multiple types of delays, so summing them all togehter to get a total count for each month by line
delays.groupby(by=['line','month']).sum()
# average monthly number of delays for each line between 2020-01 and 2023-12
# taking average of total monthly delays (pervious step)
delays_by_line = delays.groupby(by=['line','month']).sum().groupby(by='line').mean().round(2).drop(columns=['day_type'])
delays_by_line = delays_by_line[0:-1] # drop stationwide delays
# average number of monthly delays per line between 2020-01 and 2023-12
avg_delays = delays_by_line.mean()[0]
# TODO: need to change 'JZ' to 'J-Z' in dataframe
# delays.line = ['J-Z' if delays.line == 'JZ' else delays.line]
# convert subway_line_length to dataframe & merge with delays_by_line dataframe
line_length_df = pd.DataFrame.from_dict(subway_line_length, orient='index', columns=['length']
).reset_index().rename(columns={'index':'line'}).round(2)
delays_by_line.reset_index()
delays_by_line = pd.merge(left=delays_by_line, left_on='line',
right=line_length_df, right_on='line',
how='outer')
# calculate delays per subway line length
delays_by_line['delays_per_length'] = round(delays_by_line.delays / delays_by_line.length,4)
delays_by_line.rename(columns={'line':'Subway Line',
'delays': 'Average number of delays',
'length': 'Subway line length',}, inplace=True)
# lines with the most overall delays
delays_by_line.sort_values(by='Average number of delays', ascending=False).head()
| Subway Line | Average number of delays | Subway line length | delays_per_length | |
|---|---|---|---|---|
| 17 | N | 2802.90 | 116975.53 | 0.0240 |
| 7 | A | 2749.15 | 197918.54 | 0.0139 |
| 12 | F | 2422.29 | 152976.56 | 0.0158 |
| 5 | 6 | 2317.96 | 84055.24 | 0.0276 |
| 11 | E | 2224.12 | 113068.63 | 0.0197 |
# lines with the most delays per length
delays_by_line.sort_values(by='delays_per_length', ascending=False).head()
| Subway Line | Average number of delays | Subway line length | delays_per_length | |
|---|---|---|---|---|
| 5 | 6 | 2317.96 | 84055.24 | 0.0276 |
| 6 | 7 | 1449.40 | 54718.38 | 0.0265 |
| 17 | N | 2802.90 | 116975.53 | 0.0240 |
| 0 | 1 | 1627.23 | 80688.49 | 0.0202 |
| 15 | L | 1066.65 | 53486.01 | 0.0199 |
# MTA NYCT Subway Elevator and Escalator Availability: Beginning 2015
MyAppToken = 'm2MgET0gzpDk2Is9e4TOVpdZ3'
query = f"""SELECT *
WHERE total_outages>0 AND equipment_type='Elevator'
ORDER BY month DESC
LIMIT 15000"""
client = Socrata("data.ny.gov",
MyAppToken)
results = client.get("rc78-7x78", query=query)
elevator_outages_df = pd.DataFrame.from_records(results)
# cleaning up data into better formats...
elevator_outages_df.time_since_major_improvement.replace('N/A',np.NaN, inplace=True)
elevator_outages_df.month = pd.to_datetime(elevator_outages_df.month,format='%Y-%m-%dT%H:%M:%S.000')
# filter to just look at 2021 through 2023
elevator_outages_df = elevator_outages_df[elevator_outages_df.month >'2019-12-01']
columns = ['total_outages',
'scheduled_outages', 'unscheduled_outages', 'entrapments',
'time_since_major_improvement', 'am_peak_availability',
'am_peak_hours_available', 'am_peak_total_hours',
'pm_peak_availability', 'pm_peak_hours_available',
'pm_peak_total_hours', '_24_hour_availability',
'_24_hour_hours_available', '_24_hour_total_hours',
'station_mrn', 'station_complex_mrn']
for column in columns:
elevator_outages_df[column] = elevator_outages_df[column].astype('float')
elevator_outages_df.head()
| month | borough | equipment_type | equipment_code | total_outages | scheduled_outages | unscheduled_outages | entrapments | time_since_major_improvement | am_peak_availability | ... | pm_peak_availability | pm_peak_hours_available | pm_peak_total_hours | _24_hour_availability | _24_hour_hours_available | _24_hour_total_hours | station_name | station_mrn | station_complex_name | station_complex_mrn | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2023-12-01 | Queens | Elevator | EL469 | 2.0 | 2.0 | 0.0 | 0.0 | 5.0 | 1.000000 | ... | 1.000000 | 124.000000 | 124.0 | 0.994915 | 740.216667 | 744.0 | COURTSQ-XTN-G/7 | 281.0 | Court Sq (E,G,M,7) | 606.0 |
| 1 | 2023-12-01 | Manhattan | Elevator | EL116 | 3.0 | 3.0 | 0.0 | 0.0 | 25.0 | 0.983199 | ... | 1.000000 | 124.000000 | 124.0 | 0.982303 | 730.833333 | 744.0 | 190ST-8AV-A | 145.0 | 190 St - Station | 145.0 |
| 2 | 2023-12-01 | Manhattan | Elevator | EL443 | 7.0 | 5.0 | 2.0 | 0.0 | 84.0 | 0.983871 | ... | 0.981048 | 121.650000 | 124.0 | 0.975358 | 725.666667 | 744.0 | LEXINGTONAV/63ST-63S-F/Q | 223.0 | Lexington Av/63 St - Station | 223.0 |
| 3 | 2023-12-01 | Manhattan | Elevator | EL217 | 5.0 | 2.0 | 3.0 | 2.0 | 283.0 | 0.961962 | ... | 0.975806 | 121.000000 | 124.0 | 0.969377 | 721.216667 | 744.0 | 14ST-UNIONSQ-LEX-4/5/6 | 406.0 | 14 St-Union Sq (L,N,Q,R,4,5,6) | 602.0 |
| 4 | 2023-12-01 | Bronx | Elevator | EL515 | 2.0 | 1.0 | 1.0 | 0.0 | 3.0 | 1.000000 | ... | 0.983602 | 121.966667 | 124.0 | 0.993347 | 739.050000 | 744.0 | E149ST-PEL-6 | 373.0 | E 149 St - Station | 373.0 |
5 rows × 22 columns
# elevator outages by station
# first calculate one number for each station by month (some stations have multiple elevators, so calculating the average across all elevators in a station)
# then calculate the avaregae outage for each station from 2021 through 2023
avg_outages_by_station = elevator_outages_df.groupby(by=['station_mrn', 'month']).mean().reset_index().groupby(by='station_mrn').mean().round(2).reset_index()
avg_outages_by_station.columns
avg_outages_by_station = avg_outages_by_station[[
'station_mrn',
'total_outages',
'scheduled_outages','unscheduled_outages',
# 'entrapments', 'time_since_major_improvement',
'am_peak_availability',
# 'am_peak_hours_available', 'am_peak_total_hours',
'pm_peak_availability',
# 'pm_peak_hours_available', 'pm_peak_total_hours',
'_24_hour_availability',
# '_24_hour_hours_available', '_24_hour_total_hours',
# 'station_complex_mrn'
]]
avg_outages_by_station.head()
| station_mrn | total_outages | scheduled_outages | unscheduled_outages | am_peak_availability | pm_peak_availability | _24_hour_availability | |
|---|---|---|---|---|---|---|---|
| 0 | 2.0 | 3.01 | 1.77 | 1.24 | 0.99 | 0.99 | 0.98 |
| 1 | 9.0 | 4.18 | 1.72 | 2.46 | 0.97 | 0.98 | 0.97 |
| 2 | 11.0 | 3.53 | 2.28 | 1.25 | 0.97 | 0.98 | 0.96 |
| 3 | 12.0 | 3.65 | 1.96 | 1.68 | 0.98 | 0.98 | 0.97 |
| 4 | 15.0 | 3.46 | 2.40 | 1.06 | 0.97 | 0.98 | 0.97 |
# convert station_mrn to int type for merge
avg_outages_by_station.station_mrn = avg_outages_by_station.station_mrn.astype('int')
# filter out some columns
stations_gdf_temp = stations_gdf[[
# 'GTFS Stop ID',
'Station ID',
# 'Division', 'Line',
'GTFS Longitude', 'GTFS Latitude',
'Station', 'Borough', 'Daytime Routes',
'Structure',
# 'North Direction Label', 'South Direction Label',
# 'ADA', 'ADA Northbound', 'ADA Southbound',
# 'geometry',
'ADA_labels']]
station_outages = pd.merge(left=stations_gdf_temp, left_on='Station ID',
right=avg_outages_by_station.groupby(by='station_mrn').mean().reset_index(), right_on='station_mrn',
how='left')
# only plotting ADA stations
station_outages = station_outages[station_outages.ADA_labels == 'ADA-compliant']
station_outages.total_outages.replace(np.NaN, 0, inplace=True)
# stations with the most average monthly elevator outages
station_outages.sort_values(by='total_outages', ascending=False).head(5)
| Station ID | GTFS Longitude | GTFS Latitude | Station | Borough | Daytime Routes | Structure | ADA_labels | station_mrn | total_outages | scheduled_outages | unscheduled_outages | am_peak_availability | pm_peak_availability | _24_hour_availability | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 430 | 430 | -73.893064 | 40.824073 | Simpson St | Bx | 2 5 | Elevated | ADA-compliant | 430.0 | 6.50 | 2.94 | 3.56 | 0.95 | 0.95 | 0.94 |
| 392 | 392 | -73.937594 | 40.804138 | 125 St | M | 4 5 6 | Subway | ADA-compliant | 392.0 | 5.66 | 2.29 | 3.36 | 0.97 | 0.96 | 0.96 |
| 310 | 310 | -73.972323 | 40.793919 | 96 St | M | 1 2 3 | Subway | ADA-compliant | 310.0 | 5.38 | 2.69 | 2.69 | 0.97 | 0.97 | 0.96 |
| 421 | 421 | -73.866256 | 40.877850 | Gun Hill Rd | Bx | 2 5 | Elevated | ADA-compliant | 421.0 | 4.97 | 2.67 | 2.30 | 0.95 | 0.94 | 0.93 |
| 221 | 221 | -73.942836 | 40.754203 | 21 St-Queensbridge | Q | F | Subway | ADA-compliant | 221.0 | 4.96 | 2.01 | 2.95 | 0.98 | 0.97 | 0.97 |
# stations with elevators that are least available in the AM and PM peak hours
station_outages.sort_values(by='am_peak_availability', ascending=True).head(5)
| Station ID | GTFS Longitude | GTFS Latitude | Station | Borough | Daytime Routes | Structure | ADA_labels | station_mrn | total_outages | scheduled_outages | unscheduled_outages | am_peak_availability | pm_peak_availability | _24_hour_availability | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 359 | 359 | -73.947642 | 40.632836 | Flatbush Av-Brooklyn College | Bk | 2 5 | Subway | ADA-compliant | 359.0 | 4.47 | 2.47 | 2.00 | 0.95 | 0.95 | 0.94 |
| 421 | 421 | -73.866256 | 40.877850 | Gun Hill Rd | Bx | 2 5 | Elevated | ADA-compliant | 421.0 | 4.97 | 2.67 | 2.30 | 0.95 | 0.94 | 0.93 |
| 430 | 430 | -73.893064 | 40.824073 | Simpson St | Bx | 2 5 | Elevated | ADA-compliant | 430.0 | 6.50 | 2.94 | 3.56 | 0.95 | 0.95 | 0.94 |
| 114 | 115 | -74.002578 | 40.739777 | 8 Av | M | L | Subway | ADA-compliant | 115.0 | 4.47 | 2.68 | 1.79 | 0.96 | 0.96 | 0.95 |
| 317 | 317 | -73.987495 | 40.755290 | Times Sq-42 St | M | 1 2 3 | Subway | ADA-compliant | 317.0 | 4.43 | 2.38 | 2.05 | 0.96 | 0.97 | 0.96 |
# stations with elevators that are least available in the AM and PM peak hours
station_outages.sort_values(by='pm_peak_availability', ascending=True).head(5)
| Station ID | GTFS Longitude | GTFS Latitude | Station | Borough | Daytime Routes | Structure | ADA_labels | station_mrn | total_outages | scheduled_outages | unscheduled_outages | am_peak_availability | pm_peak_availability | _24_hour_availability | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 421 | 421 | -73.866256 | 40.877850 | Gun Hill Rd | Bx | 2 5 | Elevated | ADA-compliant | 421.0 | 4.97 | 2.67 | 2.30 | 0.95 | 0.94 | 0.93 |
| 430 | 430 | -73.893064 | 40.824073 | Simpson St | Bx | 2 5 | Elevated | ADA-compliant | 430.0 | 6.50 | 2.94 | 3.56 | 0.95 | 0.95 | 0.94 |
| 195 | 195 | -73.825798 | 40.685951 | Ozone Park-Lefferts Blvd | Q | A | Elevated | ADA-compliant | 195.0 | 4.78 | 2.03 | 2.75 | 0.96 | 0.95 | 0.95 |
| 359 | 359 | -73.947642 | 40.632836 | Flatbush Av-Brooklyn College | Bk | 2 5 | Subway | ADA-compliant | 359.0 | 4.47 | 2.47 | 2.00 | 0.95 | 0.95 | 0.94 |
| 97 | 98 | -73.941126 | 40.700260 | Flushing Av | Bk | J M | Elevated | ADA-compliant | 98.0 | 4.26 | 2.30 | 1.96 | 0.96 | 0.96 | 0.95 |
# plot subway stations
station_outages['marker_size'] = 3
fig = px.scatter_mapbox(station_outages,
lon=station_outages['GTFS Longitude'],
lat=station_outages['GTFS Latitude'],
hover_data={'Station':True, 'GTFS Latitude':False,'GTFS Longitude':False, 'ADA_labels':False, 'total_outages':True, 'marker_size':False},
color="_24_hour_availability",
# color='total_outages',
color_continuous_scale='Reds_r',
size = 'marker_size',
size_max = 8,
# size='total_outages',
# labels={"ADA_labels": "Subway Station ADA Compliance"},
width=1200, height=1000,
zoom=10)
# plot subway lines
lats = []
lons = []
names = []
for feature, name in zip(lines_gdf.geometry, lines_gdf.name):
if isinstance(feature, shapely.geometry.linestring.LineString):
linestrings = [feature]
elif isinstance(feature, shapely.geometry.multilinestring.MultiLineString):
linestrings = feature.geoms
else:
continue
for linestring in linestrings:
x, y = linestring.xy
lats = np.append(lats, y)
lons = np.append(lons, x)
names = np.append(names, [name]*len(y))
lats = np.append(lats, None)
lons = np.append(lons, None)
names = np.append(names, None)
names = list(names)
fig.add_trace(px.line_mapbox(lat=lats, lon=lons,
labels={'hover_data_0':'lines'},
hover_name=names,
# hover_data=[names],
# color=names,
# hover_data={'lat': False, 'lon': False, 'hover_data_0':names}
).data[0])
fig.update_traces(line={'width': 1})
# order of traces on map
fig.data = fig.data[::-1]
fig.show()
# ny state shape file
ny_gdf = gpd.read_file('data\census_data\cb_2022_36_tract_500k.zip')
# drop irrelevent columns in shapefile
drop_columns = ['STATEFP', 'COUNTYFP', 'TRACTCE', 'GEOID', 'NAME',
'NAMELSAD', 'STUSPS', 'STATE_NAME', 'LSAD', 'AWATER']
ny_gdf.drop(columns=drop_columns, inplace=True)
# filter to NYC counties
nyc_counties = ['New York County', 'Queens County', 'Richmond County',
'Kings County', 'Bronx County']
nyc_gdf = ny_gdf[ny_gdf['NAMELSADCO'].isin(nyc_counties)]
nyc_gdf.head()
| AFFGEOID | NAMELSADCO | ALAND | geometry | |
|---|---|---|---|---|
| 31 | 1400000US36005013500 | Bronx County | 167401 | POLYGON ((-73.90508 40.82711, -73.90385 40.828... |
| 32 | 1400000US36005009200 | Bronx County | 310475 | POLYGON ((-73.85773 40.83356, -73.85465 40.833... |
| 33 | 1400000US36005005400 | Bronx County | 196516 | POLYGON ((-73.88389 40.82981, -73.88235 40.831... |
| 34 | 1400000US36005036501 | Bronx County | 95827 | POLYGON ((-73.88832 40.84518, -73.88723 40.844... |
| 35 | 1400000US36005044902 | Bronx County | 156698 | POLYGON ((-73.87136 40.90004, -73.86900 40.901... |
nyc_gdf.to_file("nyc_tracts_compressed.json", driver='GeoJSON')
# prepare GeoJSON
with open('nyc_tracts_compressed.json') as f:
nyc_tracts = json.load(f)
#Check the keys of the json
nyc_tracts["features"][0].keys()
dict_keys(['type', 'properties', 'geometry'])
#We have the first level keys. The properties key is what we are interested in. Let's dig deeper there
#We'll see that almost all the keys corresponding to the columns in the geopandas object are present here.
nyc_tracts["features"][0]['properties'].keys()
dict_keys(['AFFGEOID', 'NAMELSADCO', 'ALAND'])
# demographic data with population and ambulatory population for NYC census tracts in 2022
demo_2022 = pd.read_csv('data\census_data\ACSDT5Y2022.B18105-Data.csv', header=1)
# drop irrelevent columns
demo_2022 = demo_2022[demo_2022.columns.drop(list(demo_2022.filter(regex='Margin')))]
# calculate percentage of population with ambulatory difficulty in each census tract
demo_2022['With_Ambulatory_Difficulty'] = demo_2022.filter(regex='With an ambulatory').sum(axis=1)
demo_2022['Perc_with_Ambulatory_Difficulty'] = demo_2022['With_Ambulatory_Difficulty'] / demo_2022['Estimate!!Total:']
demo_2022 = demo_2022[['Geography', 'Estimate!!Total:', 'With_Ambulatory_Difficulty', 'Perc_with_Ambulatory_Difficulty']]
# drop census tracts that have no population
demo_2022 = demo_2022[demo_2022['Estimate!!Total:'] > 0]
# drop census tracts with percent of pop over 30% - likely issues with data (too small census tracts or nursing homes)
demo_2022 = demo_2022[demo_2022.Perc_with_Ambulatory_Difficulty < .3]
demo_2022.head()
| Geography | Estimate!!Total: | With_Ambulatory_Difficulty | Perc_with_Ambulatory_Difficulty | |
|---|---|---|---|---|
| 1 | 1400000US36005000200 | 4646 | 298 | 0.064141 |
| 2 | 1400000US36005000400 | 6001 | 344 | 0.057324 |
| 3 | 1400000US36005001600 | 5684 | 643 | 0.113125 |
| 4 | 1400000US36005001901 | 1987 | 178 | 0.089582 |
| 5 | 1400000US36005001902 | 1760 | 221 | 0.125568 |
# merge demographic data with shapefile
demo_gdf = nyc_gdf.merge(demo_2022, left_on='AFFGEOID', right_on='Geography', how='left')
fig = px.choropleth_mapbox(demo_2022, geojson=nyc_tracts,
locations='Geography',
color='Perc_with_Ambulatory_Difficulty',
color_continuous_scale="Viridis_r",
opacity=0.25,
featureidkey="properties.AFFGEOID",
center = {"lat": 40.75, "lon": -74},
zoom=10,
height = 800, width = 1200,
)
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
# plot subway stations
stations_gdf['marker_size'] = 3
fig = px.scatter_mapbox(stations_gdf,
lon=stations_gdf['GTFS Longitude'],
lat=stations_gdf['GTFS Latitude'],
hover_data={'Station':True, 'GTFS Latitude':False,'GTFS Longitude':False, 'ADA_labels':False},
color="ADA_labels",
color_continuous_scale="BuGn",
labels={"ADA_labels": "Subway Station ADA Compliance"},
size = 'marker_size',
size_max = 5,
width=1200, height=1000,
zoom=10)
# fig = px.scatter_mapbox(station_outages,
# lon=station_outages['GTFS Longitude'],
# lat=station_outages['GTFS Latitude'],
# hover_data={'Station':True, 'GTFS Latitude':False,'GTFS Longitude':False, 'ADA_labels':False, 'total_outages':True, 'marker_size':False},
# color="_24_hour_availability",
# # color='total_outages',
# color_continuous_scale='Reds_r',
# size = 'marker_size',
# size_max = 8,
# # size='total_outages',
# # labels={"ADA_labels": "Subway Station ADA Compliance"},
# width=1200, height=1000,
# zoom=10)
# plot subway lines
lats = []
lons = []
names = []
for feature, name in zip(lines_gdf.geometry, lines_gdf.name):
if isinstance(feature, shapely.geometry.linestring.LineString):
linestrings = [feature]
elif isinstance(feature, shapely.geometry.multilinestring.MultiLineString):
linestrings = feature.geoms
else:
continue
for linestring in linestrings:
x, y = linestring.xy
lats = np.append(lats, y)
lons = np.append(lons, x)
names = np.append(names, [name]*len(y))
lats = np.append(lats, None)
lons = np.append(lons, None)
names = np.append(names, None)
names = list(names)
fig.add_trace(px.line_mapbox(lat=lats, lon=lons,
labels={'hover_data_0':'lines'},
hover_name=names,
# hover_data=[names],
# color=names,
# hover_data={'lat': False, 'lon': False, 'hover_data_0':names}
).data[0])
fig.update_traces(line={'width': 1})
fig.add_trace(px.choropleth_mapbox(demo_2022, geojson=nyc_tracts,
locations='Geography',
color='Perc_with_Ambulatory_Difficulty',
color_continuous_scale="BuGn",
opacity=0.25,
featureidkey="properties.AFFGEOID",
center = {"lat": 40.75, "lon": -74},
zoom=10,
height = 800, width = 1200,
).data[0])
# order of traces on map
fig.data = fig.data[::-1]
fig.show()